###############################################################################   
#### Replication Materials                                                 #### 
#### Kim, Nakka, Gopal, Desmrais, Mancinelli, Harden, Ko, Boehmke. 2021.   ####
#### Attention to the COVID-19 pandemic on Twitter:                        ####
#### Partisan differences among U.S. state legislators                     ####
#### Legislative Studies Quarterly                                         ####
###############################################################################  


###############################################################################
################################### Set Up ####################################
###############################################################################

# packages -------------------------

lapply(c("Hmisc", "arm", "readr", "tidyverse", "lme4", "stargazer", "texreg",
         "xtable", "dummies","effects", "ggthemes","caret", "ggeffects", 'plm', 
         "lmtest","scales","ggrepel","sandwich","reshape2", "plyr"), 
       require, 
       character.only = TRUE)

# read respective data sets -------------------------

# set data path
data_path <- '/Users/taegyoon/Google Drive/spap_state/spap_state_attention/data/' 

# read data
general <- read_csv(paste0(data_path, "spap_state_attention_supplementary_general.csv"))
pandemic_agg_state <- read_csv(paste0(data_path, "spap_state_attention_supplementary_pandemic_state.csv"))
pandemic_agg_national <- read_csv(paste0(data_path, "spap_state_attention_supplementary_pandemic_national.csv"))
policy_grouped <- read_csv(paste0(data_path, "spap_state_attention_supplementary_policy.csv"))


###############################################################################
############################### SI Figures 2 - 8 ##############################
###############################################################################

# Figure S2 -------------------------

# (a) pandemic-related tweets
rep_path <- 'Google Drive/spap_state/spap_state_attention/replication/'
x_break <- seq(0, 5.5, 0.5)
x_original <- round(exp(x_break)-1)
ggplot(general, 
       aes(x = covid_relevant_1_log)) + 
  geom_histogram(color="gray70", 
                 fill="gray90", 
                 binwidth = 0.5) + 
  theme_calc() +
  scale_x_continuous(breaks = x_break, 
                     labels = x_original) +
  labs(x = "\nCount of COVID-19 Relevant Tweets", 
       y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        text = element_text(size = 10),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS2a.png'),
       dpi = 600,
       width = 5,
       height = 3,
       units = 'in')

# (b) all tweets
x_break <- seq(0, 7.5, 1)
x_original <- round(exp(x_break)-1)
ggplot(general, 
       aes(x = log(tweet_count + 1))) + 
  geom_histogram(color = "gray70", 
                 fill = "gray90", 
                 binwidth = 0.5) + 
  theme_calc() +
  scale_x_continuous(breaks = x_break, 
                     labels = x_original) +
  labs(x = "\nCount of Tweets", 
       y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        text = element_text(size = 10),
        axis.text.x = element_text(size = 10),
        axis.text.y = element_text(size = 10),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS2b.png'),
       dpi = 600,
       width = 5,
       height = 3,
       units = 'in')

# Figure S3 -------------------------

# (a) political party
rep <- table(general[which(general$'week'==14), ]$republican)
df_rep <-  data.frame(rep)
ggplot(data = df_rep, 
       aes(x = Var1, 
           y = Freq, 
           fill = Var1)) + 
  theme_calc() +
  geom_bar(stat = "identity", 
           width = 0.5) +
  labs(x = "\nPolitical Party", 
       y = "Frequency\n") +
  scale_fill_manual(values = c("gray", "gray"))+
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 10),
        axis.text.x= element_text(size = 10),
        axis.text.y = element_text(size = 10),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS3a.png'),
       dpi = 600,
       width = 4,
       height = 3.5,
       units = 'in')

# (b) majority status
major <- table(general[which(general$'week'==14), ]$major_cham)
df_major <- data.frame(major)
df_major$Var1 <- c("Minority", "Majority")
ggplot(data = df_major, 
       aes(x = Var1, 
           y = Freq, 
           fill = Var1)) + 
  theme_calc() +
  geom_bar(stat = "identity", 
           width = 0.5) +
  scale_y_continuous(breaks = seq(0, 3000, 500)) +
  labs(x = "\nMajority Status in Chamber", 
       y = "Frequency\n") +
  scale_fill_manual(values = c("gray","gray")) +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 10),
        axis.text.x= element_text(size = 10),
        axis.text.y = element_text(size = 10),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS3b.png'),
       dpi = 600,
       width = 4,
       height = 3.5,
       units = 'in')

# Figure S4 -------------------------

# (a) weekly state new case
ggplot(pandemic_agg_state[which(pandemic_agg_state$week >= 14), ], 
       aes(x = state_case_pop)) + 
  geom_histogram(color = "darkgray", 
                 fill = "lightgray", 
                 binwidth = 5) + 
  theme_calc() +
  scale_x_continuous(breaks = pretty_breaks(10)) +
  scale_y_continuous(breaks = pretty_breaks(5)) +
  labs(x = "\nState New Cases Per-10k", 
       y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0),
        text = element_text(size = 11),
        axis.text.x= element_text(size = 11),
        axis.text.y= element_text(size = 11),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS4a.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (b) weekly state new death
ggplot(pandemic_agg_state[which(pandemic_agg_state$week >= 14), ], 
       aes(x= state_death_pop)) + 
  geom_histogram(color = "darkgray", 
                 fill = "lightgray", 
                 binwidth = 0.15) + 
  theme_calc() +
  scale_x_continuous(breaks = pretty_breaks(10)) +
  labs(x = "\nState New Deaths Per-10k", y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0),
        text = element_text(size = 11),
        axis.text.x= element_text(size = 11),
        axis.text.y= element_text(size = 11),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS4b.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (c) weekly national new case
ggplot(pandemic_agg_national[which(pandemic_agg_national$week >= 14),], 
       aes(x = national_case_pop)) + 
  geom_histogram(color = "darkgray", 
                 fill = "lightgray", 
                 binwidth = 1) + 
  theme_calc() +
  scale_x_continuous(breaks = pretty_breaks(10)) +
  labs(x = "\nNational New Cases Per-10k", 
       y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0),
        text = element_text(size = 11),
        axis.text.x= element_text(size = 11),
        axis.text.y= element_text(size = 11),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS4c.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (d) weekly national new death
ggplot(pandemic_agg_national[which(pandemic_agg_national$week >= 14),], 
       aes(x = national_death_pop)) + 
  geom_histogram(color = "darkgray", 
                 fill = "lightgray", 
                 binwidth = 0.05) + 
  theme_calc() +
  scale_x_continuous(breaks = pretty_breaks(10)) +
  labs(x = "\nNew Deaths Per-10k", y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0),
        text = element_text(size = 11),
        axis.text.x= element_text(size = 11),
        axis.text.y= element_text(size = 11),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS4d.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (e) legislator ideology
ggplot(head(general, 
            length(unique(general$user.screen_name))), 
       aes(x = np_score)) + 
  geom_histogram(color = "darkgray", 
                 fill = "lightgray", 
                 binwidth = 0.3) + 
  theme_calc() +
  scale_x_continuous(breaks = pretty_breaks(6)) +
  labs(x = "\nIdeology", 
       y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0),
        text = element_text(size = 11),
        axis.text.x= element_text(size = 11),
        axis.text.y= element_text(size = 11),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS4e.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (f) weekly state new policy
ggplot(policy_grouped[which(policy_grouped$week >= 14),], 
       aes(x = state_covid_policy)) + 
  geom_histogram(color = "darkgray", 
                 fill = "lightgray", binwidth = 1) + 
  theme_calc() +
  scale_x_continuous(breaks = pretty_breaks(5)) +
  labs(x = "\nCount of State-level New Policies", 
       y = "Frequency\n") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.caption = element_text(hjust = 0),
        text = element_text(size = 11),
        axis.text.x= element_text(size = 11),
        axis.text.y= element_text(size = 11),
        plot.margin = grid::unit(c(3, 3, 3, 3), "mm"))
ggsave(paste0(rep_path, 'FigS4f.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# Figure S5 -------------------------

# (a) weekly national new case (line graph)
pandemic_agg_national$date_start <- seq(as.Date("2020-1-27"), 
                                    as.Date("2020-10-30"), 
                                    by = "week")[1:39]
ggplot(tail(pandemic_agg_national, 30), 
       aes(x = date_start, y = national_case)) + 
  theme_calc() +
  geom_line(color = "gray50") + 
  geom_point(shape = 21,
             fill = "gray25") + 
  xlab("\nTime") + 
  ylab("Count\n") +
  scale_x_date(date_labels = "%b", 
               breaks = pretty_breaks(10)) +
  scale_y_continuous(labels = comma) +
  theme(plot.title = element_text(hjust = 0.5),
        text = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12))
ggsave(paste0(rep_path, 'FigS5a.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (b) weekly national new death (line graph)
ggplot(tail(pandemic_agg_national, 30), 
       aes(x = date_start, y = national_death)) + 
  theme_calc() +
  geom_line(color = "gray50") + 
  geom_point(shape = 21, 
             fill = "gray25") + 
  xlab("\nTime") + 
  ylab("Count\n") +
  scale_x_date(date_labels = "%b", 
               breaks = pretty_breaks(10))+
  scale_y_continuous(labels = comma) +
  theme(plot.title = element_text(hjust = 0.5),
        text = element_text(size = 12),
        axis.text.x = element_text(size = 12),
        axis.text.y = element_text(size = 12))
ggsave(paste0(rep_path, 'FigS5b.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# Figure S6 -------------------------

# (a) state new case (heat map, population-normalized)
pandemic_agg_state_week_14 <- pandemic_agg_state[which(pandemic_agg_state$week >= 14), ]
pandemic_agg_state_week_14$date_start <- rep(seq(as.Date("2020-3-30"), 
                                                 as.Date("2020-10-19"), 
                                                 "week"), 
                                             each = 50)
state_case_total_pop <- pandemic_agg_state_week_14 %>% 
  group_by(state_abbrev) %>% 
  dplyr::summarise(state_case_total = sum(state_case_pop))
state_case_total_pop <- state_case_total_pop[order(state_case_total_pop$state_case_total),] 
level_order_case_pop <- sort(as.vector(state_case_total_pop$state_abbrev))
ggplot(pandemic_agg_state_week_14, 
       aes(date_start,
           factor(state_abbrev, 
           level = level_order_case_pop))) + 
  geom_tile(aes(fill = state_case_pop), 
            colour = "gray") +
  scale_x_date(date_labels = "%b %d", 
               breaks = as.Date(seq(as.Date("2020-3-30"), 
                                    as.Date("2020-10-19"), 
                                    "week")),
               expand = c(0, 0)) +
  scale_fill_gradient(low = "white", 
                      high = "red", 
                      n.breaks = 8) +  
  guides(fill=guide_legend(title = "Count\n(per 10k)\n")) +
  labs(x = "\nWeek", 
       y = "State\n") +
  theme_calc() + 
  theme(panel.grid.major = element_blank(), 
        legend.position ="right",
        panel.grid.minor = element_blank(),
        legend.title = element_text(size = 8.5),
        legend.text = element_text(size = 8.5),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        axis.text.x = element_text(angle= 90, hjust = 0, size = 8.5),
        axis.text.y = element_text(size = 8.5),
        plot.title = element_text(hjust = 0.5)) 
ggsave(paste0(rep_path, 'FigS6a.png'),
       dpi = 600,
       width = 6,
       height = 6,
       units = 'in')

# (b) state new death (heat map, population-normalized)
state_death_total_pop <- pandemic_agg_state_week_14 %>% 
  group_by(state_abbrev) %>% 
  dplyr::summarise(state_death_total = sum(state_death_pop))
state_death_total_pop <- state_death_total_pop[order(state_death_total_pop$state_death_total),] 
level_order_death_pop <- sort(as.vector(state_death_total_pop$state_abbrev))
ggplot(pandemic_agg_state_week_14, 
       aes(date_start,
           factor(state_abbrev, 
                  level = level_order_death_pop))) + 
  geom_tile(aes(fill = state_death_pop), 
            colour = "gray") +
  scale_x_date(date_labels = "%b %d", 
               breaks = as.Date(seq(as.Date("2020-3-30"), 
                                    as.Date("2020-10-19"), 
                                    "week")),
               expand = c(0, 0)) +
  scale_fill_gradient(low = "white", 
                      high = "red", 
                      n.breaks = 8) +  
  guides(fill = guide_legend(title = "Count\n(per 10k)\n")) +
  labs(x = "\nWeek", 
       y = "State\n") +
  theme_calc() + 
  theme(panel.grid.major = element_blank(), 
        panel.grid.minor = element_blank(),
        legend.position="right",
        legend.title = element_text(size = 8.5),
        legend.text = element_text(size = 8.5),
        axis.title.x = element_text(size = 10),
        axis.title.y = element_text(size = 10),
        axis.text.x = element_text(angle= 90, hjust = 0, size = 8.5),
        axis.text.y = element_text(size = 8.5),
        plot.title = element_text(hjust = 0.5)) 
ggsave(paste0(rep_path, 'FigS6b.png'),
       dpi = 600,
       width = 6,
       height = 6,
       units = 'in')

# Figure S7 -------------------------

# (a) count of states without cases
pandemic_agg_state$state_case_zero <- ifelse(pandemic_agg_state$state_case == 0, 
                                           1, 
                                           0)
pandemic_agg_state$state_death_zero <- ifelse(pandemic_agg_state$state_death == 0, 
                                            1, 
                                            0)
pandemic_weekly <- pandemic_agg_state %>% 
  group_by(week) %>% 
  dplyr::summarise(state_case_sum = sum(state_case_zero),
                   state_death_sum = sum(state_death_zero))
ggplot(pandemic_weekly, 
       aes(x = week, 
           y = state_case_sum)) + 
  theme_calc() +
  geom_line(color = "gray50") + 
  geom_point(shape = 21, 
             fill = "gray25") + 
  xlab("\nTime") + 
  ylab("Count\n") +
  scale_x_continuous(breaks = pretty_breaks(10)) +
  theme(axis.text.x = element_text(angle=0, 
                                   hjust = 1, 
                                   size = 12),
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 12),
        axis.text.y= element_text(size = 12))
ggsave(paste0(rep_path, 'FigS7a.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (b) count of states without deaths
ggplot(pandemic_weekly, 
       aes(x = week, y=state_death_sum)) + 
  theme_calc() +
  geom_line(color = "gray50") + 
  geom_point(shape = 21, 
             fill = "gray25") + 
  xlab("\nTime") + 
  ylab("Count\n") +
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle=0, 
                                   hjust = 1, 
                                   size = 12),
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 12),
        axis.text.y= element_text(size = 12))
ggsave(paste0(rep_path, 'FigS7b.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# Figure S8 -------------------------

# (a) count of pandemic-related tweets for top-5 states (line graph)
covid_weekly <- general %>% 
  group_by(week) %>% 
  dplyr::summarise(National = mean(covid_relevant_1))
covid_weekly_state <- general %>% 
  group_by(week, 
           state_abbrev) %>% 
  dplyr::summarise(National = mean(covid_relevant_1))
covid_weekly_state_top_five <- covid_weekly_state[which(
  covid_weekly_state$state_abbrev=='CA' |
    covid_weekly_state$state_abbrev=='TX' |
    covid_weekly_state$state_abbrev=='FL' |
    covid_weekly_state$state_abbrev=='NY' |
    covid_weekly_state$state_abbrev=='PA'), ]
covid_weekly_top_five <- covid_weekly_state_top_five %>% 
  spread(state_abbrev, National) 
covid_count_df <- data.frame(cbind(covid_weekly, 
                                   covid_weekly_top_five[c(-1)]))
covid_count_df$date_start <- seq(as.Date("2020-3-30"), 
                                 as.Date("2020-10-19"), 
                                 by = 'week')
covid_count_df <- covid_count_df %>%
  dplyr::select(date_start, 
         National, CA, FL, PA, NY, TX) %>%
  gather(key = "level", 
         value = "count", 
         -date_start)
covid_count_df$level <- factor(covid_count_df$level, 
                               levels = c("National", "CA", "TX", "FL", "NY", "PA"))
ggplot(covid_count_df, aes(x = date_start, 
                           y = count, 
                           color = level, 
                           shape = level)) + 
  theme_calc() +
  geom_line() + 
  geom_point(size = 1.5) + 
  labs(x = "\nTime",
       y = "Mean Count (per legislator)\n",
       color  = "", 
       linetype = "", 
       shape = "") +
  scale_x_date(date_labels = "%b", 
               breaks = pretty_breaks(10))+
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle=0, 
                                   hjust = 1, 
                                   size = 10),
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 10),
        axis.text.y = element_text(size = 10)) +
  scale_color_stata()
ggsave(paste0(rep_path, 'FigS8a.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')

# (b) count of all tweets for top-5 states (line graph)
entire_weekly <- general %>% 
  group_by(week) %>% 
  dplyr::summarise(National = mean(tweet_count))
entire_weekly_state <- general %>% 
  group_by(week, state_abbrev) %>% 
  dplyr::summarise(National = mean(tweet_count))
entire_weekly_state_top_five <- entire_weekly_state[which(entire_weekly_state$state_abbrev=='CA' |
                                                            entire_weekly_state$state_abbrev=='TX' |
                                                            entire_weekly_state$state_abbrev=='FL' |
                                                            entire_weekly_state$state_abbrev=='NY' |
                                                            entire_weekly_state$state_abbrev=='PA'),]
entire_weekly_top_five <- entire_weekly_state_top_five %>% spread(state_abbrev, National) 
entire_count_df <- data.frame(cbind(entire_weekly, entire_weekly_top_five))
entire_count_df$date_start <- seq(as.Date("2020-3-30"), as.Date("2020-10-19"), by='week')
entire_count_df <- entire_count_df %>%
  dplyr::select(date_start, National, CA, FL, PA, NY, TX) %>%
  gather(key = "level", value = "count", -date_start)
entire_count_df$level <- factor(entire_count_df$level, 
                                levels = c("National", "CA", "TX", "FL", "NY", "PA"))
ggplot(entire_count_df, aes(x = date_start, y = count, color = level, shape = level)) + 
  theme_calc() +
  geom_line() + 
  geom_point(size = 1.5) +   
  labs(x = "\nTime",
       y = "Mean Count (per legislator)\n",
       color  = "", 
       linetype = "",
       shape = "") +
  scale_x_date(date_labels = "%b", 
               breaks = pretty_breaks(10))+
  scale_y_continuous(labels = comma) +
  theme(axis.text.x = element_text(angle=0, 
                                   hjust = 1, 
                                   size = 10),
        plot.title = element_text(hjust = 0.5),
        text = element_text(size = 10),
        axis.text.y = element_text(size = 10)) +
  scale_color_stata()
ggsave(paste0(rep_path, 'FigS8b.png'),
       dpi = 600,
       width = 5.5,
       height = 3.5,
       units = 'in')